## This file creates election administrative burden indices, ##
## with data compiled by the author ##
## Created by Meredith Dost and last run 8/17/2025 ##

# load in packages
library(scales)

# set working directory
#setwd("burden_data/elections/)

# read in dataset 
vburd <- read.csv("votingburden_dataset.csv")

## rescaling variables
# fix early
vburd$early_inperson10 <- ifelse(vburd$early10<=2, 1, 0)
vburd$early_inperson12 <- ifelse(vburd$early12<=2, 1, 0)
vburd$early_inperson14 <- ifelse(vburd$early14<=2, 1, 0)
vburd$early_inperson16 <- ifelse(vburd$early16<=2, 1, 0)
vburd$early_inperson18 <- ifelse(vburd$early18<=2, 1, 0)
# rescale voter ID #
vburd$voterid10 <- rescale(vburd$voterid10, c(0,1))
vburd$voterid12 <- rescale(vburd$voterid12, c(0,1))
vburd$voterid14 <- rescale(vburd$voterid14, c(0,1))
vburd$voterid16 <- rescale(vburd$voterid16, c(0,1))
vburd$voterid18 <- rescale(vburd$voterid18, c(0,1))
vburd$voterid20 <- rescale(vburd$voterid20, c(0,1))
# computing var with registration deadline
vburd$reg_daysBE10 <- as.numeric(as.Date("2010-11-02") - as.Date(as.character(vburd$regdate10), format = "%m/%d/%y"))
vburd$reg_daysBE12 <- as.numeric(as.Date("2012-11-06") - as.Date(as.character(vburd$regdate12), format = "%m/%d/%y"))
vburd$reg_daysBE14 <- as.numeric(as.Date("2014-11-04") - as.Date(as.character(vburd$regdate14), format = "%m/%d/%y"))
vburd$reg_daysBE16 <- as.numeric(as.Date("2016-11-08") - as.Date(as.character(vburd$regdate16), format = "%m/%d/%y"))
vburd$reg_daysBE18 <- as.numeric(as.Date("2018-11-06") - as.Date(as.character(vburd$regdate18), format = "%m/%d/%y"))
vburd$reg_daysBE20 <- as.numeric(as.Date("2020-11-03") - as.Date(as.character(vburd$regdate20), format = "%m/%d/%y"))
# rescaling
vburd$reg_dl10 <- rescale(vburd$reg_daysBE10, c(0,1))
vburd$reg_dl12 <- rescale(vburd$reg_daysBE12, c(0,1))
vburd$reg_dl14 <- rescale(vburd$reg_daysBE14, c(0,1))
vburd$reg_dl16 <- rescale(vburd$reg_daysBE16, c(0,1))
vburd$reg_dl18 <- rescale(vburd$reg_daysBE18, c(0,1))
vburd$reg_dl20 <- rescale(vburd$reg_daysBE20, c(0,1))
# creating auto-registration variables for each year
vburd$autoreg10 <- 0
vburd$autoreg12 <- 0
vburd$autoreg14 <- 0
vburd$autoreg16 <- ifelse(vburd$autoreg_yearimplem<=2016, 1, 0)
vburd$autoreg16[is.na(vburd$autoreg16)] <- 0
vburd$autoreg18 <- ifelse(vburd$autoreg_yearimplem<=2018, 1, 0)
vburd$autoreg18[is.na(vburd$autoreg18)] <- 0
vburd$autoreg20 <- ifelse(vburd$autoreg_yearimplem<=2020, 1, 0)
vburd$autoreg20[is.na(vburd$autoreg20)] <- 0
## converting data from wide to long format ##
varlabs <- c("noexcuse_absentee","perm_absentee","voterid","onlinereg","samedayreg","early_inperson","reg_daysBE","reg_dl","autoreg")
colnames(vburd)[2] <- "state"
# 2010
vburd10 <- vburd[,c(2,grep("10", colnames(vburd)))]
vburd10 <- vburd10[,-c(2,7)]
colnames(vburd10)[2:10] <- varlabs
vburd10$year <- 2010
# 2012
vburd12 <- vburd[,c(2,grep("12", colnames(vburd)))]
vburd12 <- vburd12[,-c(2,7)]
colnames(vburd12)[2:10] <- varlabs
vburd12$year <- 2012
# 2014
vburd14 <- vburd[,c(2,grep("14", colnames(vburd)))]
vburd14 <- vburd14[,-c(2,7)]
colnames(vburd14)[2:10] <- varlabs
vburd14$year <- 2014
# 2016
vburd16 <- vburd[,c(2,grep("16", colnames(vburd)))]
vburd16 <- vburd16[,-c(2,7)]
colnames(vburd16)[2:10] <- varlabs
vburd16$year <- 2016
# 2018
vburd18 <- vburd[,c(2,grep("18", colnames(vburd)))]
vburd18 <- vburd18[,-c(2,7)]
colnames(vburd18)[2:10] <- varlabs
vburd18$year <- 2018
# 2020
vburd20 <- vburd[,c(2,grep("20", colnames(vburd)))]
vburd20 <- vburd20[,-c(7)]
colnames(vburd20)[2:10] <- varlabs[c(6,1:5,7:9)]
vburd20$year <- 2020
### combining into long dataset
vburd_long <- rbind.data.frame(vburd10,vburd12,vburd14,vburd16,vburd18,vburd20)

## electoral burden indices ##
vburd_long$onlinereg <- -1*vburd_long$onlinereg + 1
vburd_long$autoreg <- -1*vburd_long$autoreg + 1
vburd_long$samedayreg <- -1*vburd_long$samedayreg + 1
vburd_long$early_inperson <- -1*vburd_long$early_inperson + 1
vburd_long$perm_absentee <- -1*vburd_long$perm_absentee + 1
vburd_long$regidx <- vburd_long$onlinereg + vburd_long$autoreg + vburd_long$samedayreg + vburd_long$reg_dl
vburd_long$turnidx <- vburd_long$voterid + vburd_long$early_inperson + vburd_long$perm_absentee
# create combined index for robustness checks
vburd_long$cov <- (vburd_long$regidx+vburd_long$turnidx)/7
# make registration and turnout burden go from 0-1
vburd_long$regidx <- vburd_long$regidx/4
vburd_long$turnidx <- vburd_long$turnidx/3
# subset data to only variables we need
vburd_sub <- vburd_long[c("state","year","regidx","turnidx","cov")]

##### READING IN Cost of Voting Index (COVI) DATA ####
## read in COVI Values 1996-2024 website.xlsx from https://costofvotingindex.com/data ##
covi <- read.csv("covi_subset.csv")

burd_merge <- merge(vburd_sub, covi, by=c("state","year"), all.x=T)

## save out
#setwd("burden_data/")
write.csv(burd_merge,"electburden_measures.csv",row.names=F)



